### Variance estimation for low income proportion: SOEP 2012 ###
### Meta-distribution: Inference on the mean ###

### Seed ###

  set.seed(6543)

### Load packages ###

  library(foreign)

### General setup ###  

  sampling.fraction <- 0.025
  scales <- seq(0.32,0.72,by=0.005) # Range of scale value (here: uniform)
  beta <- 0.5 # Which quintile for poverty line
  alpha <- 0.6 # Which fraction of quintile for poverty line

### Read data ###

  dat1 <- read.dta("/home/christian/Dokumente/Daten/equivar/wv2012.dta",convert.factors=FALSE)
  dat1 <- dat1[,c("d1110612","d1110712","i1110212","l1110212","h1110112")] # Only keep variables of interest
  names(dat1) <- c("family.size","no.children","income","ow","no.child.14") # Rename variables
  N <- dim(dat1)[1] # Population size
  n <- round(sampling.fraction*N,digits=0) # Sample size

### Calculate true lip ###
  
  true.lips <- numeric(length(scales))

  for(i in 1:length(scales)) {
    # Equivalized income
    dat1$equiv.weight <- dat1$family.size^scales[i]
    dat1[,paste("equiv.inc",i,sep=".")] <- dat1$income/dat1$equiv.weight
    # Poverty line 
    true.poverty.line <- alpha*quantile(dat1[,paste("equiv.inc",i,sep=".")],probs=beta) 
    # Poverty indicator 
    dat1$poor <- 0
    dat1$poor[dat1[,paste("equiv.inc",i,sep=".")]<=true.poverty.line] <- 1
    # Low income proportion 
    true.lips[i] <- mean(dat1$poor)
  }
  
  # True values
  true.lip <- mean(true.lips)

### Objects for results ###  
  
  LIP <- numeric(sims) # For estimates of lip
  SVAR <- numeric(sims) # Variance estimates of IF 
  SVAR.cov <- numeric(sims) # Coverage of 95%-CI

  lips <- numeric(length(scales))
  
### Simulation runs  

  for(i in 1:sims) {
    if((i%%100)==0) cat(".")
    if((i%%1000)==0) cat("\n")
    
    # Sample
    dat <- dat1[sample(1:N,size=n,rep=F),]
    
    for(j in 1:length(scales)) {
      
      # Equivalized income    
      poverty.line <- alpha*quantile(dat[,paste("equiv.inc",j,sep=".")],probs=beta) 
      dat$poor <- 0
      dat$poor[dat[,paste("equiv.inc",j,sep=".")]<=poverty.line] <- 1
      lips[j] <- mean(dat$poor)
      
      # Density estimates (bandwidth according to Berger/Skinner after Silverman)
      bandwidth <- 0.79 * (quantile(dat[,paste("equiv.inc",j,sep=".")],probs=0.75)-quantile(dat[,paste("equiv.inc",j,sep=".")],probs=0.25))* n^(-0.2)
      tmp <- density(dat[,paste("equiv.inc",j,sep=".")],bw=bandwidth)
      f1 <- tmp$y[which.min(abs(tmp$x-poverty.line))]
      f2 <- tmp$y[which.min(abs(tmp$x-poverty.line/alpha))]
      # With correction 2 of Graf/Tille 
      #a <- min(dat[,paste("equiv.inc",j,sep=".")])+1
      #f1 <- tmp$y[which.min(abs(tmp$x-log(poverty.line+a)))]/(poverty.line+a)
      #f2 <- tmp$y[which.min(abs(tmp$x-log(poverty.line/alpha+a)))]/(poverty.line/alpha+a)
      # Variance via influence function (not smoothed)
      dat$below.median <- 0
      dat$below.median[dat[,paste("equiv.inc",j,sep=".")]<=quantile(dat[,paste("equiv.inc",j,sep=".")],probs=beta)] <- 1
      dat$z1 <- 1/N * (dat$poor - lips[j])
      dat$z2 <- -alpha * 1/N * (f1/f2) * (dat$below.median-beta)
      dat[,paste("zz",j,sep="")]  <- dat$z1+dat$z2
    }
    
    # Mean estimates
    lip <- mean(lips)
    
    # Variance estimate
    dat$z <- rowMeans(dat[,paste("zz",1:length(scales),sep="")])
    s_var <- var(dat$z)*(N*(N-n))/n 
    # Covergae of 95%-CI
    SVAR.cov[i] <- true.lip < (lip+1.96*sqrt(s_var)) & true.lip > (lip-1.96*sqrt(s_var))
    
    # Store results  
    LIP[i] <- lip
    SVAR[i] <- s_var
    
  }

### Assessment ###

  # "True" results
  true.var <- sum((LIP-true.lip)^2)/sims
  true.sd <- sqrt(true.var)
  
  # Relative bias
  (mean(SVAR)-true.var)/true.var
  (mean(sqrt(SVAR))-true.sd)/true.sd
  
  # Coverage of 95%-CIs
  sum(SVAR.cov)/sims
  
### Save results ###
  
  datei <- paste("/home/christian/Dokumente/Equiv Variance/Ergebnisse/sim2_",sims,"_",round(sampling.fraction*100),".rda",sep="")
  save(list=ls(),file=datei)
  